meta_analysis
1 PREFACE
1.1 PROJECT
burden analysis extendend gene region (including regulatory) in sporadic breast cancer in diverse ancestries
1.2 OBJECTIVE
combine raw results of ancestries and substudies in meta analysis
1.3 load libraries
1.4 Version Info
R version 3.6.3 (2020-02-29)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 18.04.5 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
locale:
[1] LC_CTYPE=en_GB.UTF-8 LC_NUMERIC=C
[3] LC_TIME=en_GB.UTF-8 LC_COLLATE=en_GB.UTF-8
[5] LC_MONETARY=en_GB.UTF-8 LC_MESSAGES=en_GB.UTF-8
[7] LC_PAPER=en_GB.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] metap_1.3 data.table_1.12.8 forcats_0.5.0 stringr_1.4.0
[5] dplyr_0.8.5 purrr_0.3.4 readr_1.3.1 tidyr_1.0.3
[9] tibble_3.0.1 ggplot2_3.3.0 tidyverse_1.3.0 rmdformats_0.3.7
[13] knitr_1.28
loaded via a namespace (and not attached):
[1] Biobase_2.46.0 httr_1.4.1 jsonlite_1.7.1
[4] splines_3.6.3 modelr_0.1.7 sn_1.6-2
[7] Rdpack_0.11-1 assertthat_0.2.1 stats4_3.6.3
[10] TFisher_0.2.0 cellranger_1.1.0 yaml_2.2.1
[13] numDeriv_2016.8-1.1 pillar_1.4.4 backports_1.1.7
[16] lattice_0.20-41 glue_1.4.2 digest_0.6.26
[19] rvest_0.3.5 colorspace_1.4-1 sandwich_2.5-1
[22] htmltools_0.5.0 Matrix_1.2-18 pkgconfig_2.0.3
[25] bibtex_0.4.2.2 broom_0.5.6 haven_2.3.1
[28] bookdown_0.19 mvtnorm_1.1-1 scales_1.1.1
[31] generics_0.0.2 ellipsis_0.3.1 TH.data_1.0-10
[34] withr_2.3.0 BiocGenerics_0.32.0 cli_2.0.2
[37] mnormt_1.5-7 survival_3.2-7 magrittr_1.5
[40] crayon_1.3.4 readxl_1.3.1 evaluate_0.14
[43] fs_1.4.1 fansi_0.4.1 MASS_7.3-53
[46] nlme_3.1-149 xml2_1.3.2 tools_3.6.3
[49] hms_0.5.3 gbRd_0.4-11 formatR_1.7
[52] lifecycle_0.2.0 multcomp_1.4-13 mutoss_0.1-12
[55] munsell_0.5.0 reprex_0.3.0 plotrix_3.7-8
[58] compiler_3.6.3 rlang_0.4.8 grid_3.6.3
[61] rstudioapi_0.11 rmarkdown_2.1 multtest_2.42.0
[64] codetools_0.2-16 gtable_0.3.0 DBI_1.1.0
[67] R6_2.4.1 zoo_1.8-8 lubridate_1.7.8
[70] stringi_1.4.6 parallel_3.6.3 Rcpp_1.0.5
[73] vctrs_0.3.0 dbplyr_1.4.3 tidyselect_1.1.0
[76] xfun_0.13
1.5 Set Global Varibales
1.6 Plot Theme
> my_ggplotTheme = theme_bw() + theme(title = element_text(size = rel(1.3), face = "bold"),
+ axis.title.y = element_text(vjust = 1.5), axis.title.x = element_text(vjust = -1.5),
+ axis.text = element_text(size = rel(1.3)), axis.text.x = element_text(hjust = 1),
+ legend.text = element_text(size = rel(1.3)), strip.text = element_text(size = rel(1.3)),
+ plot.margin = unit(c(1, 1, 1, 2), "cm"), panel.grid.major = element_line(colour = "grey60"))1.7 define some functions
1.7.1 Genomic Control Correction
> GC_correction = function(Pvalue) {
+ # derive chi-square statistics from PValues assuming 1 degree of freedom
+ chisq_distribution <- qchisq(Pvalue, 1, lower.tail = F)
+ # calculate lambda from derived chi-square
+ lambda = median(chisq_distribution, na.rm = T)/qchisq(0.5, 1)
+ # use genomic control to calculate corrected PValues
+ out = pchisq(q = (chisq_distribution/lambda), df = 1, lower.tail = F)
+ return(out)
+ }1.7.2 make QQ plots with ggplot
> gg_qqplot <- function(pvalues, ci = 0.95) {
+
+ # function to create qq plot in ggplot of -log10 P values source:
+ # https://slowkow.com/notes/ggplot2-qqplot/
+
+ ps = pvalues[!is.na(pvalues)]
+ n <- length(ps)
+ df <- data.frame(observed = -log10(sort(ps)), expected = -log10(ppoints(n)),
+ clower = -log10(qbeta(p = (1 - ci)/2, shape1 = 1:n, shape2 = n:1)), cupper = -log10(qbeta(p = (1 +
+ ci)/2, shape1 = 1:n, shape2 = n:1)))
+ log10Pe <- expression(paste("Expected -log"[10], plain(P)))
+ log10Po <- expression(paste("Observed -log"[10], plain(P)))
+ ggplot(df) + geom_ribbon(mapping = aes(x = expected, ymin = clower, ymax = cupper),
+ alpha = 0.1) + geom_point(aes(expected, observed)) + geom_abline(intercept = 0,
+ slope = 1, alpha = 0.5) + # geom_line(aes(expected, cupper), linetype = 2, size = 0.5) +
+ # geom_line(aes(expected, clower), linetype = 2, size = 0.5) +
+ xlab(log10Pe) + ylab(log10Po)
+ }1.7.3 METAL meta analysis method, sample size based and not inverse-variance based
> METAL_Sample_size_based_meta_PValue = function(pvalues, weights) {
+
+ # Objective: use METAL implemented method to derive meta analysis P value
+ # Reference: table1 in 'METAL: fast and efficient meta-analysis of genomewide
+ # association scans', doi:10.1093/bioinformatics/btq340
+
+ # --- INPUTS pvalues = vector with pvalues weights = vector with square root of
+ # samples sizes, needs to be in same order as pvalues
+
+ # --- 1. calculate z statistic from pvalues, two tailed test
+ Z_i = qnorm(pvalues/2, lower.tail = FALSE)
+
+ # --- 2. calculate meta analysis z score, equation from METAL paper
+ meta_Z = sum(Z_i * weights, na.rm = T)/sqrt(sum(weights^2, na.rm = T))
+
+ # --- 3. derive meta analysis P values, two tailed test
+ meta_P = pnorm(meta_Z, lower.tail = F) * 2
+
+ # --- OUTPUT
+ return(meta_P)
+ }2 Load inputs
2.1 Load Gene Overview data
2.2 Load data for ancestries
2.2.1 Asian ancestry
> asian = fread(paste0(INPUTFOLDER, "ASIAN_MUMMY_SUMMARY.txt")) %>% dplyr::select(CurrentGene,
+ SNPinROI, SNPfailingInfoThreshold, SNPfailingMAFThreshold, VariantNumber, PValue,
+ P.fdr) %>% mutate(ancestry = "asian", cases = 8390, controls = 6831) %>% filter(!is.na(PValue),
+ SNPfailingMAFThreshold + SNPfailingInfoThreshold < 2 * SNPinROI) %>% mutate(PValueGC = GC_correction(PValue))2.2.2 African ancestry
> african = fread(paste0(INPUTFOLDER, "AFRICAN_MUMMY_SUMMARY.txt")) %>% dplyr::select(CurrentGene,
+ SNPinROI, SNPfailingInfoThreshold, SNPfailingMAFThreshold, VariantNumber, PValue,
+ P.fdr) %>% mutate(ancestry = "african", cases = 3678, controls = 2056) %>% filter(!is.na(PValue),
+ SNPfailingMAFThreshold + SNPfailingInfoThreshold < 2 * SNPinROI) %>% mutate(PValueGC = GC_correction(PValue))2.2.3 Hispanic ancestry
> hispanic = fread(paste0(INPUTFOLDER, "HISPANIC_MUMMY_SUMMARY.txt")) %>% dplyr::select(CurrentGene,
+ SNPinROI, SNPfailingInfoThreshold, SNPfailingMAFThreshold, VariantNumber, PValue,
+ P.fdr) %>% mutate(ancestry = "hispanic", cases = 1335, controls = 1216) %>% filter(!is.na(PValue),
+ SNPfailingMAFThreshold + SNPfailingInfoThreshold < 2 * SNPinROI) %>% mutate(PValueGC = GC_correction(PValue))2.2.4 EUROPEAN eur01a
> eur01A = fread(paste0(INPUTFOLDER, "EUROPEAN_eur01A_MUMMY_SUMMARY.txt")) %>% dplyr::select(CurrentGene,
+ SNPinROI, SNPfailingInfoThreshold, SNPfailingMAFThreshold, VariantNumber, PValue,
+ P.fdr) %>% mutate(ancestry = "european01A", cases = 6350, controls = 4802) %>%
+ filter(!is.na(PValue), SNPfailingMAFThreshold + SNPfailingInfoThreshold < 2 *
+ SNPinROI) %>% mutate(PValueGC = GC_correction(PValue))2.2.5 EUROPEAN eur01b
> eur01B = fread(paste0(INPUTFOLDER, "EUROPEAN_eur01B_MUMMY_SUMMARY.txt")) %>% dplyr::select(CurrentGene,
+ SNPinROI, SNPfailingInfoThreshold, SNPfailingMAFThreshold, VariantNumber, PValue,
+ P.fdr) %>% mutate(ancestry = "european01B", cases = 5187, controls = 4238) %>%
+ filter(!is.na(PValue), SNPfailingMAFThreshold + SNPfailingInfoThreshold < 2 *
+ SNPinROI) %>% mutate(PValueGC = GC_correction(PValue))2.2.6 EUROPEAN eur01c
> eur01C = fread(paste0(INPUTFOLDER, "EUROPEAN_eur01C_MUMMY_SUMMARY.txt")) %>% dplyr::select(CurrentGene,
+ SNPinROI, SNPfailingInfoThreshold, SNPfailingMAFThreshold, VariantNumber, PValue,
+ P.fdr) %>% mutate(ancestry = "european01C", cases = 5982, controls = 5422) %>%
+ filter(!is.na(PValue), SNPfailingMAFThreshold + SNPfailingInfoThreshold < 2 *
+ SNPinROI) %>% mutate(PValueGC = GC_correction(PValue))2.2.7 EUROPEAN eur01d
> eur01D = fread(paste0(INPUTFOLDER, "EUROPEAN_eur01D_MUMMY_SUMMARY.txt")) %>% dplyr::select(CurrentGene,
+ SNPinROI, SNPfailingInfoThreshold, SNPfailingMAFThreshold, VariantNumber, PValue,
+ P.fdr) %>% mutate(ancestry = "european01D", cases = 5012, controls = 4552) %>%
+ filter(!is.na(PValue), SNPfailingMAFThreshold + SNPfailingInfoThreshold < 2 *
+ SNPinROI) %>% mutate(PValueGC = GC_correction(PValue))2.2.8 EUROPEAN eur02a
> eur02A = fread(paste0(INPUTFOLDER, "EUROPEAN_eur02A_MUMMY_SUMMARY.txt")) %>% dplyr::select(CurrentGene,
+ SNPinROI, SNPfailingInfoThreshold, SNPfailingMAFThreshold, VariantNumber, PValue,
+ P.fdr) %>% mutate(ancestry = "european02A", cases = 7576, controls = 2124) %>%
+ filter(!is.na(PValue), SNPfailingMAFThreshold + SNPfailingInfoThreshold < 2 *
+ SNPinROI) %>% mutate(PValueGC = GC_correction(PValue))2.2.9 EUROPEAN eur02b
> eur02B = fread(paste0(INPUTFOLDER, "EUROPEAN_eur02B_MUMMY_SUMMARY.txt")) %>% dplyr::select(CurrentGene,
+ SNPinROI, SNPfailingInfoThreshold, SNPfailingMAFThreshold, VariantNumber, PValue,
+ P.fdr) %>% mutate(ancestry = "european02B", cases = 7953, controls = 2347) %>%
+ filter(!is.na(PValue), SNPfailingMAFThreshold + SNPfailingInfoThreshold < 2 *
+ SNPinROI) %>% mutate(PValueGC = GC_correction(PValue))2.2.10 EUROPEAN eur03a
> eur03A = fread(paste0(INPUTFOLDER, "EUROPEAN_eur03A_MUMMY_SUMMARY.txt")) %>% dplyr::select(CurrentGene,
+ SNPinROI, SNPfailingInfoThreshold, SNPfailingMAFThreshold, VariantNumber, PValue,
+ P.fdr) %>% mutate(ancestry = "european03A", cases = 5049, controls = 3472) %>%
+ filter(!is.na(PValue), SNPfailingMAFThreshold + SNPfailingInfoThreshold < 2 *
+ SNPinROI) %>% mutate(PValueGC = GC_correction(PValue))2.2.11 EUROPEAN eur03b
> eur03B = fread(paste0(INPUTFOLDER, "EUROPEAN_eur03B_MUMMY_SUMMARY.txt")) %>% dplyr::select(CurrentGene,
+ SNPinROI, SNPfailingInfoThreshold, SNPfailingMAFThreshold, VariantNumber, PValue,
+ P.fdr) %>% mutate(ancestry = "european03B", cases = 4057, controls = 3647) %>%
+ filter(!is.na(PValue), SNPfailingMAFThreshold + SNPfailingInfoThreshold < 2 *
+ SNPinROI) %>% mutate(PValueGC = GC_correction(PValue))2.2.12 EUROPEAN eur04
> eur04 = fread(paste0(INPUTFOLDER, "EUROPEAN_eur04_MUMMY_SUMMARY.txt")) %>% dplyr::select(CurrentGene,
+ SNPinROI, SNPfailingInfoThreshold, SNPfailingMAFThreshold, VariantNumber, PValue,
+ P.fdr) %>% mutate(ancestry = "european04", cases = 6192, controls = 8323) %>%
+ filter(!is.na(PValue), SNPfailingMAFThreshold + SNPfailingInfoThreshold < 2 *
+ SNPinROI) %>% mutate(PValueGC = GC_correction(PValue))2.2.13 EUROPEAN eur05a
> eur05A = fread(paste0(INPUTFOLDER, "EUROPEAN_eur05A_MUMMY_SUMMARY.txt")) %>% dplyr::select(CurrentGene,
+ SNPinROI, SNPfailingInfoThreshold, SNPfailingMAFThreshold, VariantNumber, PValue,
+ P.fdr) %>% mutate(ancestry = "european05A", cases = 5698, controls = 3869) %>%
+ filter(!is.na(PValue), SNPfailingMAFThreshold + SNPfailingInfoThreshold < 2 *
+ SNPinROI) %>% mutate(PValueGC = GC_correction(PValue))2.2.14 EUROPEAN eur05b
> eur05B = fread(paste0(INPUTFOLDER, "EUROPEAN_eur05B_MUMMY_SUMMARY.txt")) %>% dplyr::select(CurrentGene,
+ SNPinROI, SNPfailingInfoThreshold, SNPfailingMAFThreshold, VariantNumber, PValue,
+ P.fdr) %>% mutate(ancestry = "european05B", cases = 5298, controls = 3532) %>%
+ filter(!is.na(PValue), SNPfailingMAFThreshold + SNPfailingInfoThreshold < 2 *
+ SNPinROI) %>% mutate(PValueGC = GC_correction(PValue))2.2.15 EUROPEAN eur05c
> eur05C = fread(paste0(INPUTFOLDER, "EUROPEAN_eur05C_MUMMY_SUMMARY.txt")) %>% dplyr::select(CurrentGene,
+ SNPinROI, SNPfailingInfoThreshold, SNPfailingMAFThreshold, VariantNumber, PValue,
+ P.fdr) %>% mutate(ancestry = "european05C", cases = 5626, controls = 3366) %>%
+ filter(!is.na(PValue), SNPfailingMAFThreshold + SNPfailingInfoThreshold < 2 *
+ SNPinROI) %>% mutate(PValueGC = GC_correction(PValue))3 Quality Control
3.1 Combined data
> QC_stuff = asian[, c(1, 2, 3, 4, 5, 8)] %>% rbind(african[, c(1, 2, 3, 4, 5, 8)]) %>%
+ rbind(hispanic[, c(1, 2, 3, 4, 5, 8)]) %>% rbind(eur01A[, c(1, 2, 3, 4, 5, 8)]) %>%
+ rbind(eur01B[, c(1, 2, 3, 4, 5, 8)]) %>% rbind(eur01C[, c(1, 2, 3, 4, 5, 8)]) %>%
+ rbind(eur01D[, c(1, 2, 3, 4, 5, 8)]) %>% rbind(eur02A[, c(1, 2, 3, 4, 5, 8)]) %>%
+ rbind(eur02B[, c(1, 2, 3, 4, 5, 8)]) %>% rbind(eur03A[, c(1, 2, 3, 4, 5, 8)]) %>%
+ rbind(eur03B[, c(1, 2, 3, 4, 5, 8)]) %>% rbind(eur04[, c(1, 2, 3, 4, 5, 8)]) %>%
+ rbind(eur05A[, c(1, 2, 3, 4, 5, 8)]) %>% rbind(eur05B[, c(1, 2, 3, 4, 5, 8)]) %>%
+ rbind(eur05C[, c(1, 2, 3, 4, 5, 8)])3.2 Number of genes with results per cohort
> QC_stuff %>% count(ancestry) %>% mutate(OVERALL = nrow(gene_overview)) %>% ggplot(aes(y = reorder(ancestry,
+ n), x = n)) + geom_bar(stat = "identity", fill = "black", color = "black") +
+ geom_bar(aes(x = OVERALL), stat = "identity", fill = NA, color = "black") + labs(x = "Number of genes",
+ y = "Cohort", title = "Number of Genes with results", subtitle = paste0("Total number genes to be analysed ",
+ nrow(gene_overview))) + my_ggplotTheme + theme(axis.text.x = element_text(hjust = 0.5)) +
+ ggsave(paste0(OUTFOLDER, SCRIPT, ".supp.number_genes_per_cohort.", DATE, ".pdf"),
+ device = "pdf", height = 10)3.3 Number of analysed SNPs per Gene in each cohort
Distribution in cohorts of african and hispanic ancestry lightly skewed to the right compared to other cohorts.
> QC_stuff %>% ggplot(aes(x = VariantNumber, color = ancestry, fill = ancestry)) +
+ geom_histogram(bins = 50) + facet_grid(ancestry ~ .) + labs(title = paste0("Number of analysed SNPs per gene"),
+ x = "Variants in MONSTER analysis per Gene", y = "Gene Count") + xlim(0, 1000) +
+ my_ggplotTheme3.4 Proportion of SNPs per gene removed due to missing INFO or MAF threshold?
In Hispanic and African ancestry samples smaller proportion of SNPs per gene removed. Possibly due to higher rate of rare variants in these ancestries.
> QC_stuff %>% mutate(PROP_removed_snps = (SNPfailingInfoThreshold + SNPfailingMAFThreshold)/SNPinROI) %>%
+ ggplot(aes(x = PROP_removed_snps, color = ancestry, fill = ancestry)) + geom_histogram() +
+ facet_grid(ancestry ~ .) + labs(title = paste0("Proportion of removed SNPs per gene"),
+ x = "Proportion of removed SNPs per gene", y = "Gene Count") + my_ggplotTheme +
+ xlim(-0.1, 1)3.5 Proportion of SNPs per gene removed due to low imputation quality
In european cohorts remarkavly more SNPs removed due to low INFO scores.
> QC_stuff %>% ggplot(aes(x = SNPfailingInfoThreshold/SNPinROI, color = ancestry, fill = ancestry)) +
+ geom_histogram() + facet_grid(ancestry ~ .) + labs(title = paste0("Proportion of SNPs removed due to low imputation quality per gene"),
+ x = "Proportion of removed SNPs per gene", y = "Gene count") + my_ggplotTheme3.6 Proportion of SNPs per gene removed due to MAF >0.05
> QC_stuff %>% ggplot(aes(x = SNPfailingMAFThreshold/SNPinROI, color = ancestry, fill = ancestry)) +
+ geom_histogram() + facet_grid(ancestry ~ .) + labs(title = paste0("Proportion of SNPs removed with MAF >0.05 quality per gene"),
+ x = "Proportion of removed SNPs per gene", y = "Gene Count") + my_ggplotTheme3.7 QQ Plots and Genomic Inflation
3.7.1 function to perform calculations and make qqPlot
> qq_lambda = function(PValueVector) {
+
+ data = PValueVector
+
+ # calculate Lambda defined as meadin(chisqu)/
+ pvalue <- data$PValue
+ chisq <- qchisq(pvalue, 1, lower.tail = F)
+ lambda = median(chisq)/qchisq(0.5, 1)
+
+ # calculate lambda1000
+ lambda1000 = 1 + (lambda - 1) * (1/data$cases[1] + 1/data$controls[1])/(1/1000 +
+ 1/1000)
+
+ # create QQ Plot
+ QQ = gg_qqplot(data$PValue) + labs(title = paste0("QQ Plot Pvalues in ", data$ancestry[1],
+ " ancestry samples"), subtitle = paste0("number samples = ", data$cases[1] +
+ data$controls[1], ", number genes = ", nrow(data[!is.na(data$PValue), ]),
+ "\nlambda = ", round(lambda, 3), ", lambda1000 = ", round(lambda1000, 3))) +
+ my_ggplotTheme
+
+ out = list(QQ, lambda, lambda1000)
+ return(out)
+ }3.7.2 run function over all cohorts and save qqplots
> all_subcohorts = c("asian", "african", "hispanic", "eur01A", "eur01B", "eur01C",
+ "eur01D", "eur02A", "eur02B", "eur03A", "eur03B", "eur04", "eur05A", "eur05B",
+ "eur05C")
>
> # collect lambda, lambda1000 esitmates
> lambda.summary.list = list()
>
> for (i in 1:length(all_subcohorts)) {
+
+ out = qq_lambda(get(all_subcohorts[i]))
+
+ # PLOT
+ ggsave(plot = out[[1]], filename = paste0(OUTFOLDER, SCRIPT, ".supp.QQ.", all_subcohorts[i],
+ ".", DATE, ".pdf"), device = "pdf", dpi = 300)
+
+ lambda.summary.list[[i]] = data.frame(ancestry = all_subcohorts[i], case = get(all_subcohorts[i])$case[1],
+ control = get(all_subcohorts[i])$control[1], lambda = out[[2]], lambda1000 = out[[3]])
+
+ }3.7.3 save lamdba and lambda1000 metrics
3.7.4 Lambda
4 Combine association data
> assocs = asian %>% select(1, Asian_Num_Variants = 5, Asian_PValue = 6, Asian_FDR = 7,
+ Asian_PValueGC = 11) %>% full_join(african) %>% select(1:5, Afrian_Num_Variants = VariantNumber,
+ African_PValue = PValue, African_FDR = P.fdr, African_PValueGC = PValueGC) %>%
+ full_join(hispanic) %>% select(1:9, Hispanic_Num_Variants = VariantNumber, Hispanic_PValue = PValue,
+ Hispanic_FDR = P.fdr, Hispanic_PValueGC = PValueGC) %>% full_join(eur01A) %>%
+ select(1:13, Eur01A_Num_Variants = VariantNumber, Eur01A_PValue = PValue, Eur01A_FDR = P.fdr,
+ Eur01A_PValueGC = PValueGC) %>% full_join(eur01B) %>% select(1:17, Eur01B_Num_Variants = VariantNumber,
+ Eur01B_PValue = PValue, Eur01B_FDR = P.fdr, Eur01B_PValueGC = PValueGC) %>% full_join(eur01C) %>%
+ select(1:21, Eur01C_Num_Variants = VariantNumber, Eur01C_PValue = PValue, Eur01C_FDR = P.fdr,
+ Eur01C_PValueGC = PValueGC) %>% full_join(eur01D) %>% select(1:25, Eur01D_Num_Variants = VariantNumber,
+ Eur01D_PValue = PValue, Eur01D_FDR = P.fdr, Eur01D_PValueGC = PValueGC) %>% full_join(eur02A) %>%
+ select(1:29, Eur02A_Num_Variants = VariantNumber, Eur02A_PValue = PValue, Eur02A_FDR = P.fdr,
+ Eur02A_PValueGC = PValueGC) %>% full_join(eur02B) %>% select(1:33, Eur02B_Num_Variants = VariantNumber,
+ Eur02B_PValue = PValue, Eur02B_FDR = P.fdr, Eur02B_PValueGC = PValueGC) %>% full_join(eur03A) %>%
+ select(1:37, Eur03A_Num_Variants = VariantNumber, Eur03A_PValue = PValue, Eur03A_FDR = P.fdr,
+ Eur03A_PValueGC = PValueGC) %>% full_join(eur03B) %>% select(1:41, Eur03B_Num_Variants = VariantNumber,
+ Eur03B_PValue = PValue, Eur03B_FDR = P.fdr, Eur03B_PValueGC = PValueGC) %>% full_join(eur04) %>%
+ select(1:45, Eur04_Num_Variants = VariantNumber, Eur04_PValue = PValue, Eur04_FDR = P.fdr,
+ Eur04_PValueGC = PValueGC) %>% full_join(eur05A) %>% select(1:49, Eur05A_Num_Variants = VariantNumber,
+ Eur05A_PValue = PValue, Eur05A_FDR = P.fdr, Eur05A_PValueGC = PValueGC) %>% full_join(eur05B) %>%
+ select(1:53, Eur05B_Num_Variants = VariantNumber, Eur05B_PValue = PValue, Eur05B_FDR = P.fdr,
+ Eur05B_PValueGC = PValueGC) %>% full_join(eur05C) %>% select(1:57, Eur05C_Num_Variants = VariantNumber,
+ Eur05C_PValue = PValue, Eur05C_FDR = P.fdr, Eur05C_PValueGC = PValueGC)5 Perform Meta Analysis Data
Meta ANalysis based on raw PValues in subcohorts except for subcohort Eur02B (lambda1000:1.320551) and Hispanic (lambda1000:1.1369632) cohorts will use genomic control corrected PValues.
5.1 add further columns for Meta-Analysis P Values
5.2 define weights for meta analysis
> sampleSizeAll=c(sqrt(15221), #asian
+ sqrt(5734), #african
+ sqrt(2551), #hispanic
+ sqrt(11152), #eur01A
+ sqrt(9425), #eur01B
+ sqrt(11404), #eur01C
+ sqrt(9564), #eur01D
+ sqrt(9700), #eur02A
+ sqrt(10300), #eur02B
+ sqrt(8521), #eur03A
+ sqrt(7704), #eur03B
+ sqrt(14515), #eur04
+ sqrt(9567), #eur05A
+ sqrt(8830), #eur05B
+ sqrt(8992)) #eur05C
>
> sampleSizeEuro=c(sqrt(11152), #eur01A
+ sqrt(9425), #eur01B
+ sqrt(11404), #eur01C
+ sqrt(9564), #eur01D
+ sqrt(9700), #eur02A
+ sqrt(10300), #eur02B
+ sqrt(8521), #eur03A
+ sqrt(7704), #eur03B
+ sqrt(14515), #eur04
+ sqrt(9567), #eur05A
+ sqrt(8830), #eur05B
+ sqrt(8992)) #eur05C5.3 perform meta-analysis
> for (i in 1:nrow(assocs)) {
+ # temporary data for metaP value calculation for all cohorts
+ tmpAll = Meta %>% slice(i) %>% select(Asian_PValue, African_PValue, Hispanic_PValueGC,
+ Eur01A_PValue, Eur01B_PValue, Eur01C_PValue, Eur01D_PValue, Eur02A_PValue,
+ Eur02B_PValueGC, Eur03A_PValue, Eur03B_PValue, Eur04_PValue, Eur05A_PValue,
+ Eur05B_PValue, Eur05C_PValue) %>% unlist()
+
+ # temporary data for metaP value calculation for European cohorts
+ tmpEuro = Meta %>% slice(i) %>% select(Eur01A_PValue, Eur01B_PValue, Eur01C_PValue,
+ Eur01D_PValue, Eur02A_PValue, Eur02B_PValueGC, Eur03A_PValue, Eur03B_PValue,
+ Eur04_PValue, Eur05A_PValue, Eur05B_PValue, Eur05C_PValue) %>% unlist()
+
+ # meta analysis using Stouffer
+ Meta$StoufferMetaP[i] = sumz(p = tmpAll, weights = sampleSizeAll, na.action = na.omit)$p
+ Meta$StoufferMetaEuropeanP[i] = sumz(p = tmpEuro, weights = sampleSizeEuro, na.action = na.omit)$p
+
+ # meta analysis using METAL method
+ Meta$MetalMetaP[i] = METAL_Sample_size_based_meta_PValue(pvalues = tmpAll, weights = sampleSizeAll)
+ Meta$MetalMetaEuropeanP[i] = METAL_Sample_size_based_meta_PValue(pvalues = tmpEuro,
+ weights = sampleSizeEuro)
+
+ Meta$Number_missingPvalues[i] = sum(is.na(tmpAll))
+ }
>
> Meta$StoufferMetaP_FDR = p.adjust(Meta$StoufferMetaP, method = "BH")
> Meta$StoufferMetaEuropeanP_FDR = p.adjust(Meta$StoufferMetaEuropeanP, method = "BH")
> Meta$MetalrMetaP_FDR = p.adjust(Meta$MetalMetaP, method = "BH")
> Meta$MetalMetaEuropeanP_FDR = p.adjust(Meta$MetalMetaEuropeanP, method = "BH")